Dans ce fichier, j’utilise les gènes que l’on a déterminé après l’étude des données RNAseq comme étant Activé en présence de SvbACT et les gènes considérés comme étant réprimés en présence de SvbREP.

Toutes les figures et tableaux sont disponibles ici (https://mycore.core-cloud.net/index.php/s/dBITIcJgQx6P7Fp) ou ici (https://www.dropbox.com/sh/irhqsmaemsicwqn/AAA34FiarJPLLjoOZ3wQP1w6a?dl=0) pour le script et le RData ( pasassez de place sur la dropbox et MyCore ne fonctionne plus )

#chargement des données RNAseq 
load("edgeR_Result.RData")
#chargement des packages 
library(pheatmap)
library(chipenrich)
library(RColorBrewer)
library(vioplot)
library(VennDiagram)
library(ChIPpeakAnno)
library(TxDb.Dmelanogaster.UCSC.dm6.ensGene)
library(DT)
library(plotly)
'%!in%' <- function(x,y)!('%in%'(x,y))

1 Création des groupes de gènes cibles

downREP_FP = rownames(lrt.2.tables$RepVsContr)[lrt.2.tables$RepVsAct$logFC < 0 & lrt.2.tables$ActVsRep$BH.bin == 1]
upACT_FP = rownames(lrt.2.tables$ActVsContr)[lrt.2.tables$ActVsContr$logFC > 0 & lrt.2.tables$ActVsRep$BH.bin == 1]
pvalue_Act_REP = rownames(lrt.2.tables$ActVsRep)[lrt.2.tables$ActVsRep$BH.bin == 1]
updown_Svb = row.names(lrt.2.tables$ActVsRep)[lrt.2.tables$ActVsContr$logFC > 0 & lrt.2.tables$RepVsContr$logFC < 0 & lrt.2.tables$ActVsRep$BH.bin == 1]
gene_cible_updown = upACT_FP%in%downREP_FP

downREP_FP_tab = lrt.2.tables$RepVsContr[lrt.2.tables$RepVsAct$logFC < 0,]



updown_Svb_tab= as.data.frame( round( cbind(lrt.2.tables$ActVsRep$logFC[rownames(lrt.2.tables$ActVsRep) %in% updown_Svb], lrt.2.tables$ActVsContr$logFC[rownames(lrt.2.tables$ActVsContr) %in% updown_Svb],lrt.2.tables$RepVsContr$logFC[rownames(lrt.2.tables$RepVsContr)%in%updown_Svb]) , 2) , row.names = updown_Svb ) 
colnames(updown_Svb_tab) = c("ActvsRep","ActvsCtrl","RepvsCtrl")
updown_Svb_tab_order = updown_Svb_tab[,2:3][order(updown_Svb_tab$ActvsRep , decreasing = T),]

datatable(updown_Svb_tab_order)
# Création des heatmap {.tabset .tabset-fade .tabset-pills}

#Ici je crée des heatmaps permettant de voir les gènes ***cibles*** et leur logFC dans les différentes conditions *Act/Ctrl*; *Rep/Ctrl* et *Act/Rep*. 
# breaksList = seq(-7, 7, by = 1)
# pheatmap(updown_Svb_tab_order, main="Heatmap", color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),  breaks = breaksList ,cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", filename = "/Users/alexmanchenoferris/Documents/Chipseq_ActRep/heatmap/heatmap_updown.pdf" )
# 
# pheatmap(updown_Svb_tab_order, main="Heatmap", color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),  breaks = breaksList ,cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", silent = F )
# 
# pheatmap(updown_Svb_tab_order[1:100,], main="Heatmap", color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", filename = "/Users/alexmanchenoferris/Documents/Chipseq_ActRep/heatmap/heatmap_updown_100premiers.pdf" )
# 
# pheatmap(updown_Svb_tab_order[1:57,], main="Heatmap", color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),  breaks = breaksList ,cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", filename = "/Users/alexmanchenoferris/Documents/Chipseq_ActRep/heatmap/heatmap_updown_57premiers.pdf" )
# 
# pheatmap(updown_Svb_tab_order[58:114,], main="Heatmap", color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),  breaks = breaksList ,cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", filename = "/Users/alexmanchenoferris/Documents/Chipseq_ActRep/heatmap/heatmap_updown_57seconds.pdf" )
# 
# pheatmap(updown_Svb_tab_order[115:171,], main="Heatmap", color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),  breaks = breaksList ,cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", filename = "/Users/alexmanchenoferris/Documents/Chipseq_ActRep/heatmap/heatmap_updown_57_3.pdf" )
# 
# pheatmap(updown_Svb_tab_order[172:228,], main="Heatmap", color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),  breaks = breaksList ,cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", filename = "/Users/alexmanchenoferris/Documents/Chipseq_ActRep/heatmap/heatmap_updown_57_4.pdf" )
# 
# pheatmap(updown_Svb_tab_order[229:285,], main="Heatmap", color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),  breaks = breaksList ,cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", filename = "/Users/alexmanchenoferris/Documents/Chipseq_ActRep/heatmap/heatmap_updown_57_5.pdf" )
# 
# 
# #automatisation 
# gene_num =  seq(1,nrow(updown_Svb_tab_order),15)
# pas = seq(1,length(gene_num)-1,1)
# 
# for (i in pas){
#   print(i)
#     d = paste('Heatmap', gene_num[i] , gene_num[i+1]-1, sep = "-")
#     name_file = paste(d,"pdf" , sep = ".")
#     
#   pheatmap(updown_Svb_tab_order[gene_num[i]:(gene_num[i+1]-1), ],main=d, color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),  breaks = breaksList ,cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", filename = name_file, fontsize = 7)
#   if (i == length(pas)){
#     d = paste('/Users/alexmanchenoferris/Documents/Chipseq_ActRep/heatmap/Heatmap_auto', gene_num[i+1] , nrow(updown_Svb_tab_order), sep = "-")
#     name_file = paste(d,"pdf" , sep = ".")
#     pheatmap(updown_Svb_tab_order[gene_num[i+1]:nrow(updown_Svb_tab_order),],main=d, color = colorRampPalette(rev(brewer.pal(n = 7, name = "PiYG")))(length(breaksList)),  breaks = breaksList ,cluster_rows = F, cluster_cols = F, cellwidth = 30,border_color = "black", cellheight = 15,display_numbers = T, number_color = "black", filename = name_file, fontsize = 7)
#   }
#   
# }

On obtient un groupe de gènes cibles de 285 gènes qui sont activés et réprimés par Svb tout en ayant une différence de comportement significatives quand on compare les conditions Activateur vs Répresseur.

On veut maintenant attribuer à ces gènes cibles les pics de fixation de Shavenbaby. Pour cela j’utilise deux approches une première que l’on pourrait qualifier de Peak centric, puis une seconde que l’on peut dire Gene centric.

2 Recherche des gènes cibles directes

2.1 Approche Pics centriques

2.1.1 Méthode 1: Recherche d’association genome-wide des régions de fixation de Shavenbaby

Ce que je fais ici, c’est prendre les régions des fixations de Svb ( analyse des données de ChIPseq ) et les associer à tous les gènes de la drosophile. Pas vraiment ce que l’on cherche ici!

#Récupération des pics de Svb
load("/Users/alexmanchenoferris/Documents/Chipseq_ActRep/Myworkspace.RData")
B1_peaks = read.table("/Users/alexmanchenoferris/Documents/Chipseq_ActRep/macs/stringent/macs2_B1_peaks.narrowPeak_chr.bed",sep = "\t", comment.char = "#", header = F, stringsAsFactors = F)
Svb_peaks = B1_peaks[,1:3]
colnames(Svb_peaks) = c("chrom","start","end")
#histogramme de la distance des peaks 
plot_dist_to_tss(peaks = Svb_peaks, genome = "dm6")

library(clusterProfiler)
library(org.Dm.eg.db)
et <- bitr(updown_Svb, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Dm.eg.db")
head(et)
##       SYMBOL ENTREZID
## 1       Acer    34189
## 2  Acox57D-d    37446
## 3  Acox57D-p    37445
## 4       Adk1    39396
## 5     AhcyL1    38342
## 6 alpha-Est7    40901
manquant = read.table("/Users/alexmanchenoferris/Documents/Chipseq_ActRep/transform_geneID.txt")
colnames(manquant) = c("ID_htseq", "FlybaseID", "Flybase_ID", "gene_symbol_Flybase")
et_manquant = bitr(manquant$gene_symbol_Flybase, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Dm.eg.db")
gene_set = rbind(et, et_manquant)
all_ENTREZID = bitr( rownames(lrt.2.tables$ActVsContr),fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Dm.eg.db")
'%!in%' <- function(x,y)!('%in%'(x,y))

2.1.1.1 Assignement dans des gènes aux pics ( Reactomes)

all_manquant = rownames(lrt.2.tables$ActVsContr)[rownames(lrt.2.tables$ActVsContr)%!in%all_ENTREZID$SYMBOL]
peak_assign_peak_centric = chipenrich(peaks = Svb_peaks, genome = 'dm6', genesets = "reactome",
locusdef = "1kb", qc_plots = FALSE, out_name = NULL, n_cores = 1)
## Reading peaks from data.frame...
## Assigning peaks to genes with assign_peaks(...) ..
## Test: ChIP-Enrich
## Genesets: Reactome
## Running tests..
datatable(peak_assign_peak_centric$peaks, caption = "Peaks assignement to genes Reactomes")
datatable(peak_assign_peak_centric$peaks_per_gene,rownames = F,caption = "Count fo genes per peaks ", filter = "top")
datatable(peak_assign_peak_centric$results[,1:10],rownames = F,caption = "Gene set enrichment test on each geneset ")

2.1.1.2 Assignement dans des gènes aux pics (GOMF)

peak_assign_peak_centric = chipenrich(peaks = Svb_peaks, genome = 'dm6', genesets = "GOMF",
locusdef = "1kb", qc_plots = FALSE, out_name = NULL, n_cores = 1)
## Reading peaks from data.frame...
## Assigning peaks to genes with assign_peaks(...) ..
## Test: ChIP-Enrich
## Genesets: Gene Ontology Molecular Function
## Running tests..
datatable(peak_assign_peak_centric$peaks, caption = "Peaks assignement to genes GOMF")
datatable(peak_assign_peak_centric$peaks_per_gene,rownames = F,caption = "Count fo genes per peaks ", filter = "top")
datatable(peak_assign_peak_centric$results[,1:10],rownames = F,caption = "Gene set enrichment test on each geneset ")

2.1.1.3 Assignement dans des gènes aux pics (GOCC)

peak_assign_peak_centric = chipenrich(peaks = Svb_peaks, genome = 'dm6', genesets = "GOCC",
locusdef = "1kb", qc_plots = FALSE, out_name = NULL, n_cores = 1)
## Reading peaks from data.frame...
## Assigning peaks to genes with assign_peaks(...) ..
## Test: ChIP-Enrich
## Genesets: Gene Ontology Cellular Component
## Running tests..
datatable(peak_assign_peak_centric$peaks, caption = "Peaks assignement to genes GOCC")
datatable(peak_assign_peak_centric$peaks_per_gene,rownames = F,caption = "Count fo genes per peaks ", filter = "top")
datatable(peak_assign_peak_centric$results[,1:10],rownames = F,caption = "Gene set enrichment test on each geneset ")

2.1.1.4 Assignement dans des gènes aux pics (GOBP)

peak_assign_peak_centric = chipenrich(peaks = Svb_peaks, genome = 'dm6', genesets = "GOBP",
locusdef = "1kb", qc_plots = FALSE, out_name = NULL, n_cores = 1)
## Reading peaks from data.frame...
## Assigning peaks to genes with assign_peaks(...) ..
## Test: ChIP-Enrich
## Genesets: Gene Ontology Biological Process
## Running tests..
datatable(peak_assign_peak_centric$peaks, caption = "Peaks assignement to genes GOCC")
datatable(peak_assign_peak_centric$peaks_per_gene,rownames = F,caption = "Count fo genes per peaks ", filter = "top")
datatable(peak_assign_peak_centric$results[,1:10],rownames = F,caption = "Gene set enrichment test on each geneset ")

2.1.2 Méthode 2 : Association des gènes les plus proches des pics

J’ai pris les pics que j’avais associé à des gènes " le plus proche " avec le package ChIPpeakAnno et je regarde combien de gènes sont commun avec les gènes cibles.

binding_gene = read.table("/Users/alexmanchenoferris/Documents/RNAseq/RNAseq_S2/gene_binding_svb.txt")
venn.diagram(
     x = list( updownSvb = updown_Svb,
              binding_gene =  binding_gene$x),
     main = "Comparaison Genes Cibles et binding ",
     filename = "venndia_updown_ACTREPSign_bind.png" , col = c("purple","blue"),height = 5000,width = 5000, imagetype = "png")
## [1] 1
overlap_commun = calculate.overlap(x = list("updown" = updown_Svb , "binding" = binding_gene$x))
#calcul de la pval
phyper(length(overlap_commun$a3),length(overlap_commun$a1),nrow(RNAseq)-length(overlap_commun$a1),length(overlap_commun$a2), lower.tail = FALSE)
## [1] 4.23056e-22
Comparaison des gènes cibles et fixation de Svb
bind_updown = updown_Svb_tab[rownames(updown_Svb_tab)%in%binding_gene$x,]
bind_updown$gene =rownames(bind_updown)

venn.diagram(
     x = list( upAct = row.names(lrt.2.tables$ActVsContr)[lrt.2.tables$ActVsContr$logFC > 0 & lrt.2.tables$ActVsContr$BH.bin ==1 ],
               downAct = row.names(lrt.2.tables$ActVsContr)[lrt.2.tables$ActVsContr$logFC < 0 & lrt.2.tables$ActVsContr$BH.bin ==1],
              binding_gene =  binding_gene$x),
     main = "Comparaison Genes DE Act et binding ",
     filename = "/Users/alexmanchenoferris/Documents/RNAseq/venndiagramm_geneciblesDirecte/venndia_DEAct_bind.png" , col = c("green","darkgreen","blue"),height = 5000,width = 5000, imagetype = "png")
## [1] 1
Comparaison des gènes cibles de l’Activateur et fixation de Svb
venn.diagram(
     x = list( upAct = row.names(lrt.2.tables$ActVsContr)[lrt.2.tables$ActVsContr$logFC > 0 ],
               downRep = row.names(lrt.2.tables$ActVsContr)[lrt.2.tables$ActVsContr$logFC < 0 ],
               signActRep = row.names(lrt.2.tables$ActVsRep)[lrt.2.tables$ActVsRep$BH.bin == 1],
              binding_gene =  binding_gene$x),
     main = "Comparaison upAct, downRep,signActRep, binding ",
     filename = "venndia_tout_bind.png" , col = c("green","red","blue", "purple"),height = 5000,width = 5000,imagetype = "png")
## [1] 1
Comparaison des gènes cibles et fixation de Svb
venn.diagram(
     x = list( downAct = row.names(lrt.2.tables$ActVsContr)[lrt.2.tables$ActVsContr$logFC < 0],
              binding_gene =  binding_gene$x),
     main = "Comparaison Genes downACT et binding ",
     filename = "/Users/alexmanchenoferris/Documents/RNAseq/venndiagramm_geneciblesDirecte/venndia_downAct_bind.png" , col = c("green","blue"),height = 5000,width = 5000, imagetype = "png")
## [1] 1
#calcul de la pval
overlap_commun = calculate.overlap(x = list("downAct" = row.names(lrt.2.tables$ActVsContr)[lrt.2.tables$ActVsContr$logFC < 0] , "binding" = binding_gene$x))
phyper(length(overlap_commun$a3),length(overlap_commun$a1),nrow(RNAseq)-length(overlap_commun$a1),length(overlap_commun$a2), lower.tail = FALSE)
## [1] 0.0005195547
Comparaison des gènes réprimés par Svb Activateur et fixation de Svb
venn.diagram(
     x = list( downRep = row.names(lrt.2.tables$RepVsContr)[lrt.2.tables$RepVsContr$logFC < 0],
              binding_gene =  binding_gene$x),
     main = "Comparaison Genes downRep et binding ",
     filename = "/Users/alexmanchenoferris/Documents/RNAseq/venndiagramm_geneciblesDirecte/venndia_downRep_bind.png" , col = c("red","blue"),height = 5000,width = 5000,imagetype = "png")
## [1] 1
overlap_commun = calculate.overlap(x = list("downRep" = row.names(lrt.2.tables$RepVsContr)[lrt.2.tables$RepVsContr$logFC < 0] , "binding" = binding_gene$x))
phyper(length(overlap_commun$a3),length(overlap_commun$a1),nrow(RNAseq)-length(overlap_commun$a1),length(overlap_commun$a2), lower.tail = FALSE)
## [1] 8.467327e-10
Comparaison des gènes réprimés par Svb Répresseur et fixation de Svb
venn.diagram(
     x = list( upRep = row.names(lrt.2.tables$RepVsContr)[lrt.2.tables$RepVsContr$logFC > 0],
               downRep = row.names(lrt.2.tables$RepVsContr)[lrt.2.tables$RepVsContr$logFC < 0],
              binding_gene =  binding_gene$x),
     main = "Comparaison Genes DERep et binding ",
     filename = "../venndiagramm_geneciblesDirecte/venndia_DE_Rep_bind.png" , col = c("red","darkred","blue"),height = 5000,width = 5000,imagetype = "png")
## [1] 1
Comparaison des gènes cibles par Svb Répresseurs et fixation de Svb
venn.diagram(
     x = list( upRep = row.names(lrt.2.tables$RepVsContr)[lrt.2.tables$RepVsContr$logFC > 0 & lrt.2.tables$RepVsContr$BH.bin == 1],
              binding_gene =  binding_gene$x),
     main = "Comparaison Genes upRep et binding ",
     filename = "/Users/alexmanchenoferris/Documents/RNAseq/venndiagramm_geneciblesDirecte/venndia_upRep_bind.png" , col = c("red","blue"),height = 5000,width = 5000,imagetype = "png")
## [1] 1
overlap_commun = calculate.overlap(x = list("upRep" = row.names(lrt.2.tables$RepVsContr)[lrt.2.tables$RepVsContr$logFC > 0] , "binding" = binding_gene$x))
phyper(length(overlap_commun$a3),length(overlap_commun$a1),nrow(RNAseq)-length(overlap_commun$a1),length(overlap_commun$a2), lower.tail = FALSE)
## [1] 1.23871e-27
Comparaison des gènes activés par Svb Répresseur et fixation de Svb
venn.diagram(
     x = list( upAct = row.names(lrt.2.tables$ActVsContr)[lrt.2.tables$ActVsContr$logFC > 0 & lrt.2.tables$ActVsContr$BH.bin == 1],
               downRep = row.names(lrt.2.tables$RepVsContr)[lrt.2.tables$RepVsContr$logFC < 0 & lrt.2.tables$RepVsContr$BH.bin == 1 ],
              binding_gene =  binding_gene$x),
              main = "Comparaison Genes upAct , downRep et binding ",
     filename = "/Users/alexmanchenoferris/Documents/RNAseq/venndiagramm_geneciblesDirecte/venndia_upAct_downRep_binding.png" ,height = 5000,width = 5000, imagetype = "png")
## [1] 1
Comparaison des gènes activés par Svb Activateur, des gènes par Svb Répresseur et fixation de Svb
venn.diagram(
     x = list( upAct = row.names(lrt.2.tables$ActVsContr)[lrt.2.tables$ActVsContr$logFC > 0],
               
              signActRep = rownames(lrt.2.tables$ActVsRep)[lrt.2.tables$ActVsRep$BH.bin == 1],
                
               downRep = row.names(lrt.2.tables$RepVsContr)[lrt.2.tables$RepVsContr$logFC < 0],
              binding_gene =  binding_gene$x),
              main = "Comparaison Genes Cibles  et binding ",
     filename = "/Users/alexmanchenoferris/Documents/RNAseq/venndiagramm_geneciblesDirecte/venndia_tout.png" ,height = 5000,width = 5000,imagetype = "png")
## [1] 1
Comparaison des gènes activés par Svb Activateur, des gènes par Svb Répresseur et fixation de Svb

J’obtiens avec cette première méthode 172 gènes cibles présentant une fixation de Svb. On peut dire que ce sont des gènes cibles directes (ils sont avec leur LogFC dans le fichier gene_centric_methode1_geneciblesdirecte.txt ).

2.2 Approche gene centrique

Dans cette approche Gene centrique, ce que j’ai fais c’est de calculer la distances entre les coordonnées des gènes cibles et les coordonnées des pics de fixation de Shavenbaby.

Ici ce que je fais c’est que je prend les gènes cibles, je prend leur coordonnées et pas que leur nom contrairement à la Pics Centrique Méthode 2 puis je compare ces gènes avec les picss qui ont aussi leurs coordonnées

2.2.1 Obtention des coordonnées des gènes cibles

target_set = Dm6_gene[names(Dm6_gene)%in%updown_Svb,]
datatable(as.data.frame(target_set))

2.2.2 Gènes chevauchant avec des pics

table(countOverlaps(B1.peaks,target_set))
## 
##    0    1    2 
## 5172  326   14
count_overlaps = as.data.frame(table(countOverlaps(B1.peaks,target_set)))
barplot(count_overlaps$Freq, main = " Nombre de gènes dans pics", col = c('#67001f','#b2182b','#d6604d'), xlab = "gènes", ylab = "pics", names.arg = count_overlaps$Var1)

overlaps_ChIPseq_RNA = as.data.frame(findOverlaps(B1.peaks, target_set,type = "any", select = "all"))
target_set_tab = as.data.frame(target_set)
target_set_tab$row = row(target_set_tab)[,1]
target_set_tab$gene = names(target_set)


table(countOverlaps(target_set,B1.peaks))
## 
##   0   1   2   3   4   5   6   7   9  10  17 
## 107  98  40  21   7   6   2   1   1   1   1
count_overlaps = as.data.frame(table(countOverlaps(target_set,B1.peaks)))
barplot(count_overlaps$Freq,legend.text = count_overlaps$Var1, col = c('#67001f','#b2182b','#d6604d','#f4a582','#fddbc7','#f7f7f7','#d1e5f0','#92c5de','#4393c3','#2166ac','#053061'), main = " Nombre de pics compris ans les gènes cibles" , names.arg = count_overlaps$Var1,ylab = "# genes", xlab = "# peaks")

overlaps_RNA_ChIPseq = as.data.frame(findOverlaps(target_set,B1.peaks,type = "any", select = "all"))
target_set_tab = as.data.frame(target_set)
target_set_tab$row = row(target_set_tab)[,1]
target_set_tab$gene = names(target_set)

overlaps_RNA_ChIPseq$gene = target_set_tab$gene[match(overlaps_RNA_ChIPseq$queryHits, target_set_tab$row)]
gene_overlapant_RNA_ChIPseq = unique(overlaps_RNA_ChIPseq$gene)

Les gènes CG4297, CG3036, senju, stai, mmy, Pvf3, CG7149, LKRSDH, pes, CG7627, grk, Acer, CG17834, Sema1a, Spn31A, CG7384, piwi, Samuel, dmGlut, CG6770, vir-1, CG15279, lace, c(2)M, Socs36E, ham, CG9331, CG42238, CG2201, CR43148, lt, ap, CG43340, CG1882, pdm3, CG12780, CG8213, hig, Cyp4p3, wun, brp, CG12926, Mos, CR45140, CG6553, CR44367, Cyp6a22, Cyp6a20, CG30091, Cdk4, CR44375, mbl, CG15111, cora, CG11961, sm, exu, Lapsyn, CG9485, Acox57D-p, CG34370, CG2852, CG3831, CG9815, ItgaPS5, Orcokinin, CR45805, CR43334, bab2, rho, pns, CG5707, CG45186, Src64B, axo, CG18769, CR32385, CG8620, CG8596, pst, Fhos, bol, path, CG6084, Adk1, mirr, nuf, fz, bbg, Gbs-70E, sff, CG7341, CG9629, CG13252, Eip78C, CG7470, CR45174, CG11739, CG34357, Cdep, Cerk, alpha-Est7, CR45549, CR18228, CG8112, CG18473, RhoL, hth, CG14688, CG5281, ClC-a, GC1, GstD9, GstD1, GstD3, NANS, sqd, CG34383, CG14855, RpS5b, CR45600, CG3984, alpha-Man-IIb, spn-E, glob1, CG6006, CG6126, Fas1, Mvl, dnd, wake, Lsd-1, CG33111, Pli, CG6178, crb, mld, CG10550, CG14545, CG31324, CG12290, Tusp, CG12428, CG11897, Zip99C, CG9743, Fer1HCH, CG1544, dati, zfh2, apolpp, Cadps, CG3777, CG32816, CG32795, CR44498, CG14438, CG9650, CR44357, CG42797, Gip, l(1)G0289, Karl, CG43921, Pde9, rdgB, inaE, sog, Rhp, dpr18, Grip84, Hers, amn, CG32521, l(1)G0196, CR43493, CR44997, fog possédant des pics.

2.2.3 Calcul de la distance des gènes cibles aux pics

dist_peak_gene = distanceToNearest(B1.peaks,target_set,ignore.strand = T,select = "all")
dist_tab_peak_gene = as.data.frame(dist_peak_gene)
dist_tab_peak_gene$gene = target_set_tab$gene[match(dist_tab_peak_gene$subjectHits, target_set_tab$row)]
overlaps_ChIPseq_RNA$distance = dist_tab_peak_gene$distance[match(overlaps_ChIPseq_RNA$queryHits, dist_tab_peak_gene$queryHits)]
overlaps_ChIPseq_RNA$gene = target_set_tab$gene[match(overlaps_ChIPseq_RNA$subjectHits, target_set_tab$row)]
colnames(dist_tab_peak_gene) = c("peaks_id","target_id","distance","gene")
dist_tab_peak_gene_10kb =dist_tab_peak_gene[dist_tab_peak_gene$distance <= 10000,]
dist_tab_peak_gene_5kb = dist_tab_peak_gene[dist_tab_peak_gene$distance <=5000,]


dist = distanceToNearest(target_set,B1.peaks,ignore.strand = T,select = "all")
dist_tab = as.data.frame(dist)
dist_tab$gene = target_set_tab$gene[match(dist_tab$queryHits, target_set_tab$row)]
overlaps_RNA_ChIPseq$distance = dist_tab$dist[match(overlaps_RNA_ChIPseq$gene, dist_tab$gene)]

row = c("Minimun","1st Qu.","Median","Mean","3rd Qu.", "Max")
distance = c(0,0,0,4453,0,1049564)
summary  = data.frame(distance,row.names = row)
datatable(summary)

On peut observer qu’en moyenne les pics présentent une distance à moins de 5kb des gènes.

2.2.3.1 Représentation graphique du nombre de gènes par pics

round(c(100*98/178,100*40/178,100*21/178,100*7/178,6*100/178,100*2/178,1*100/178,100*1/178,100*1/178,100*1/178),2)
##  [1] 55.06 22.47 11.80  3.93  3.37  1.12  0.56  0.56  0.56  0.56
resultat_overlap = data.frame(genes = c(98,40,21,7,6,2,1,1,1,1), peaks = c(1,2,3,4,5,6,7,9,10,17), pourcentage = c("55.06%","22.47%","11.80%","3.93%","3.37%","1.12%","0.56%","0.56%","0.56%","0.56%"))

barplot(resultat_overlap$genes, names.arg = resultat_overlap$peaks, xlab = "Number of peaks", ylab = "Number of genes", legend.text = resultat_overlap$pourcentage, col = c('#67001f','#b2182b','#d6604d','#f4a582','#fddbc7','#f7f7f7','#2166ac','#2166ac','#2166ac','#2166ac'))

  • Il y a 107 gènes contenant 0 pic
  • Il y a 98 gènes contenant 1 pic
  • Il y a 40 gènes contenant 2 pics
  • Il y a 21 gènes contenant 3 pics
  • Il y a 7 gènes contenant 4 pics
  • Il y a 6 gènes contenant 5 pics
  • Il y a 1 gène contenant 7 pics
  • Il y a 1 gène contenant 9 pics
  • Il y a 1 gène contenant 10 pics
  • Il y a 1 gène contenant 17 pics

2.2.3.2 Représentation de la distance entre les gènes et les pics

dist_tab$factor = rep("gene", nrow(dist_tab))


fig = ggplot(dist_tab, aes(x= factor,y = distance, color = factor)) + geom_violin() +scale_fill_brewer(palette="Dark2") +ggtitle("Distribution de la distance des pics aux gènes")
ggplotly(fig)
vioplot(dist_tab_peak_gene_5kb$distance,col="blue", main = "Distribution de la distance des pics aux gènes", ylab = "distance (pb)")

dist_tab_peak_gene_5kb$factor = rep("gene", nrow(dist_tab_peak_gene_5kb))
fig = ggplot(dist_tab_peak_gene_5kb, aes(x= factor,y = distance, color = factor)) + geom_violin() +scale_fill_brewer(palette="Dark2") +ggtitle("Distribution de la distance des pics aux gènes (0-5kb)")
ggplotly(fig)
dist_tab_peak_gene_10kb$factor = rep("gene", nrow(dist_tab_peak_gene_10kb))
fig = ggplot(dist_tab_peak_gene_10kb, aes(x= factor,y = distance, color = factor)) + geom_violin() +scale_fill_brewer(palette="Dark2") +ggtitle("Distribution de la distance des pics aux gènes (0-10kb)")
ggplotly(fig)

2.2.3.3 Représentation graphique de la distance des pics aux gènes

t = dist_tab[dist_tab$distance==0 ,]
unique(t$gene)
##   [1] "CG4297"        "CG3036"        "senju"         "stai"         
##   [5] "mmy"           "Pvf3"          "CG7149"        "LKRSDH"       
##   [9] "pes"           "CG7627"        "grk"           "Acer"         
##  [13] "CG17834"       "Sema1a"        "Spn31A"        "CG7384"       
##  [17] "piwi"          "Samuel"        "dmGlut"        "CG6770"       
##  [21] "vir-1"         "CG15279"       "lace"          "c(2)M"        
##  [25] "Socs36E"       "ham"           "CG9331"        "CG42238"      
##  [29] "CG2201"        "CR43148"       "lt"            "ap"           
##  [33] "CG43340"       "CG1882"        "pdm3"          "CG12780"      
##  [37] "CG8213"        "hig"           "Cyp4p3"        "wun"          
##  [41] "brp"           "CG12926"       "Mos"           "CR45140"      
##  [45] "CG6553"        "CR44367"       "Cyp6a22"       "Cyp6a20"      
##  [49] "CG30091"       "Cdk4"          "CR44375"       "mbl"          
##  [53] "CG15111"       "cora"          "CG11961"       "sm"           
##  [57] "exu"           "Lapsyn"        "CG9485"        "Acox57D-p"    
##  [61] "CG34370"       "CG2852"        "CG3831"        "CG9815"       
##  [65] "ItgaPS5"       "Orcokinin"     "CR45805"       "CR43334"      
##  [69] "bab2"          "rho"           "pns"           "CG5707"       
##  [73] "CG45186"       "Src64B"        "axo"           "CG18769"      
##  [77] "CR32385"       "CG8620"        "CG8596"        "pst"          
##  [81] "Fhos"          "bol"           "path"          "CG6084"       
##  [85] "Adk1"          "mirr"          "nuf"           "fz"           
##  [89] "bbg"           "Gbs-70E"       "sff"           "CG7341"       
##  [93] "CG9629"        "CG13252"       "Eip78C"        "CG7470"       
##  [97] "CR45174"       "CG11739"       "CG34357"       "Cdep"         
## [101] "Cerk"          "alpha-Est7"    "CR45549"       "CR18228"      
## [105] "CG8112"        "CG18473"       "RhoL"          "hth"          
## [109] "CG14688"       "CG5281"        "ClC-a"         "GC1"          
## [113] "GstD9"         "GstD1"         "GstD3"         "NANS"         
## [117] "sqd"           "CG34383"       "CG14855"       "RpS5b"        
## [121] "CR45600"       "CG3984"        "alpha-Man-IIb" "spn-E"        
## [125] "glob1"         "CG6006"        "CG6126"        "Fas1"         
## [129] "Mvl"           "dnd"           "wake"          "Lsd-1"        
## [133] "CG33111"       "Pli"           "CG6178"        "crb"          
## [137] "mld"           "CG10550"       "CG14545"       "CG31324"      
## [141] "CG12290"       "Tusp"          "CG12428"       "CG11897"      
## [145] "Zip99C"        "CG9743"        "Fer1HCH"       "CG1544"       
## [149] "dati"          "zfh2"          "apolpp"        "Cadps"        
## [153] "CG3777"        "CG32816"       "CG32795"       "CR44498"      
## [157] "CG14438"       "CG9650"        "CR44357"       "CG42797"      
## [161] "Gip"           "l(1)G0289"     "Karl"          "CG43921"      
## [165] "Pde9"          "rdgB"          "inaE"          "sog"          
## [169] "Rhp"           "dpr18"         "Grip84"        "Hers"         
## [173] "amn"           "CG32521"       "l(1)G0196"     "CR43493"      
## [177] "CR44997"       "fog"
nrow(dist_tab[dist_tab$distance>0 & dist_tab$distance<=1000,])
## [1] 30
nrow(dist_tab[dist_tab$distance> 1000 & dist_tab$distance<=3000,])
## [1] 29
nrow(dist_tab[dist_tab$distance> 3000 & dist_tab$distance<=5000,])
## [1] 10
nrow(dist_tab[dist_tab$distance> 5000 & dist_tab$distance<=10000,])
## [1] 16
nrow(dist_tab[dist_tab$distance> 10000 ,])
## [1] 22
pielabels = c("0kb","0-1kb","1kb-3kb","3kb-5kb","5kb-10kb",">10kb")
resultat = data.frame(distance = c("0kb","0-1kb","1kb-3kb","3kb-5kb","5kb-10kb",">10kb"), genes = c(178,30,29,10,16,22), pourcentage = c("62.5%","10.5%","10.2%","3.5%","5.6%","7.7%"))
fig <- plot_ly(resultat, labels = ~distance, values = ~genes, type = 'pie')
fig <- fig %>% layout(title = 'Distance des gènes cibles aux pics ')
pie(resultat$genes, labels = resultat$pourcentage, col = c("#ffffff","#e9a3c9","#c51b7d","#e6f5d0","#a1d76a","#4d9221"), main = "Distance between Svb peaks and its target genes" )
legend("bottomright",legend=pielabels,fill= c("#ffffff","#e9a3c9","#c51b7d","#e6f5d0","#a1d76a","#4d9221"))

  • Il y a 178 gènes avec au moins 1 pic overlappant
  • 30 gènes avec des pics à distance comprise entre 0 et 1kb
  • 29 gènes avec des pics à distance comprise entre 1kb et 3kb
  • 10 gènes avec des pics à distance comprise entre 3kb et 5kb
  • 16 gènes avec des pics à distance comprise entre 5kb et 10kb
  • 22 gènes avec des pics à plus de 10kb

3 Groupes de gènes

La colonnes chr, start , end correspondent aux coordonnées des pics dans les tableaux dist_tab_etc

3.1 Distance entre les gènes cibles et les pics de Shavenbaby

B1_peaks_tab = as.data.frame(B1.peaks)
B1_peaks_tab$names =  rownames(B1_peaks_tab)
B1_peaks_tab$row = row(B1_peaks_tab)[,1]
dist_tab$chr = B1_peaks_tab$seqnames[match( dist_tab$subjectHits, B1_peaks_tab$row )]
dist_tab$start = B1_peaks_tab$start[match( dist_tab$subjectHits, B1_peaks_tab$row )]
dist_tab$end = B1_peaks_tab$end[match( dist_tab$subjectHits, B1_peaks_tab$row )]
dist_tab$names = B1_peaks_tab$names[match( dist_tab$subjectHits, B1_peaks_tab$row )]
dist_tab$scores = B1_peaks_tab$score[match( dist_tab$subjectHits, B1_peaks_tab$row )]
dist_tab$fold = B1_peaks_tab$fold_enrichment[match( dist_tab$subjectHits, B1_peaks_tab$row )]

datatable(dist_tab[,3:10], filter = "top", caption = "Distance entre les gènes cibles et les pics de Shavenbaby")

3.2 Gènes cibles jusqu’à 10kb

dist_tab_gene_peak_10kb = dist_tab[dist_tab$distance<=10000,]
datatable(dist_tab_gene_peak_10kb[,3:10], filter = "top", caption = "Les gènes cibles et les pics de Shavenbaby dont la distance est <= 10kb")

3.3 Gènes jusqu’à 5kb

dist_tab_gene_peak_5kb = dist_tab[dist_tab$distance<=5000,]
datatable(dist_tab_gene_peak_5kb[,3:10], filter = "top", caption = "Les gènes cibles et les pics de Shavenbaby dont la distance est <= 5kb")

3.4 Gènes entre 5 et 10kb

dist_tab_peak_gene_5kb_10kb = dist_tab[dist_tab$distance>5000 & dist_tab$distance<=10000,]
datatable(dist_tab_peak_gene_5kb_10kb[,3:10], filter = "top", caption = "Les gènes cibles et les pics de Shavenbaby dont la distance est comprise entre 5 et 10kb")

3.5 Gènes au-delà de 10kb

dist_tab_peak_gene_audela10kb = dist_tab[dist_tab$distance> 10000 ,]
datatable(dist_tab_peak_gene_audela10kb[,3:10], filter = "top", caption = "Les gènes cibles et les pics de Shavenbaby dont la distance est supérieure à 10kb")

3.6 Gènes contenant des pics

peak_overlaping_gene = dist_tab[dist_tab$distance==0,]
peak_overlaping_gene_GR = GRanges(seqnames = Rle(peak_overlaping_gene$chr), 
                     ranges=IRanges(start = peak_overlaping_gene$start,end = peak_overlaping_gene$end, names = peak_overlaping_gene$names),
                     strand = Rle(rep("*",nrow(peak_overlaping_gene))),
                     score = peak_overlaping_gene$scores,
                   fold_enrichment = peak_overlaping_gene$fold)
datatable(peak_overlaping_gene[,3:10], filter = "top", caption = "Les gènes cibles et les pics de Shavenbaby chevauchant ")
non_associed_peaks= B1_peaks_tab[B1_peaks_tab$names%!in%dist_tab$names,]

3.7 Coordonnées des gènes cibles directes

3.7.1 Gènes cibles directes (0-5kb)

gene_0_5kb = target_set_tab[,1:5][unique(dist_tab_gene_peak_5kb$gene),]
gene_0_5kb$gene = rownames(gene_0_5kb)
datatable(gene_0_5kb ,caption = "Genes cibles directes (0-5kb)")

3.7.2 Gènes cibles directes (5-10kb)

gene_5_10kb = target_set_tab[,1:5][unique(dist_tab_peak_gene_5kb_10kb$gene),]
gene_5_10kb$gene = rownames(gene_5_10kb) 
datatable(gene_5_10kb,caption  = "Gènes cibles directes (5-10kb)")

3.7.3 Gènes cibles ( au- delà de 10kb)

gene_audela_10kb = target_set_tab[,1:5][unique(dist_tab_peak_gene_audela10kb$gene),]
gene_audela_10kb$gene = rownames(gene_audela_10kb)
datatable(gene_audela_10kb, caption = "Gènes cibles ( au-delà de 10kb")
Dm6_gene_tab = as.data.frame(Dm6_gene)
Dm6_gene_tab$gene = rownames(Dm6_gene_tab)
non_associed_gene_peaks =  Dm6_gene_tab[,1:5][Dm6_gene_tab$gene%!in%dist_tab$gene,]
non_associed_gene_peaks$gene = rownames(non_associed_gene_peaks)


promoters_gene_0_5kb = promoters( GRanges(seqnames = gene_0_5kb$seqnames,
                               ranges = IRanges(start = gene_0_5kb$start, end = gene_0_5kb$end, names = gene_0_5kb$gene),
                               strand = gene_0_5kb$strand) ) 

promoters_gene_0_5kb_tab = as.data.frame(promoters_gene_0_5kb)

promoters_gene_5_10kb = promoters( GRanges(seqnames = gene_5_10kb$seqnames,
                               ranges = IRanges(start = gene_5_10kb$start, end = gene_5_10kb$end, names = gene_5_10kb$gene),
                               strand = gene_5_10kb$strand) ) 

promoters_gene_5_10kb_tab = as.data.frame(promoters_gene_5_10kb)

promoters_gene_audela_10kb = promoters( GRanges(seqnames = gene_audela_10kb$seqnames,
                               ranges = IRanges(start = gene_audela_10kb$start, end = gene_audela_10kb$end, names = gene_audela_10kb$gene),
                               strand = gene_audela_10kb$strand)) 

promoters_gene_audela_10kb_tab = as.data.frame(promoters_gene_audela_10kb)

promoters_non_associed_gene_peaks = promoters( GRanges(seqnames = non_associed_gene_peaks$seqnames,
                               ranges = IRanges(start = non_associed_gene_peaks$start, end = non_associed_gene_peaks$end, names = non_associed_gene_peaks$gene),
                               strand = non_associed_gene_peaks$strand)) 
promoters_gene_non_associed_gene_peaks_tab = as.data.frame(promoters_non_associed_gene_peaks)
promoters_gene_non_associed_gene_peaks_tab$gene = rownames(promoters_gene_non_associed_gene_peaks_tab)
flanked_gene_0_5kb = flank( GRanges(seqnames = gene_0_5kb$seqnames,
                               ranges = IRanges(start = gene_0_5kb$start, end = gene_0_5kb$end, names = gene_0_5kb$gene),
                               strand = gene_0_5kb$strand) , 5000,start=TRUE) 

flanked_gene_0_5kb_tab = as.data.frame(flanked_gene_0_5kb)

flanked_gene_5_10kb = flank( GRanges(seqnames = gene_5_10kb$seqnames,
                               ranges = IRanges(start = gene_5_10kb$start, end = gene_5_10kb$end, names = gene_5_10kb$gene),
                               strand = gene_5_10kb$strand) , 5000 , start = T) 

flanked_gene_5_10kb_tab = as.data.frame(flanked_gene_5_10kb)

flanked_gene_audela_10kb = flank( GRanges(seqnames = gene_audela_10kb$seqnames,
                               ranges = IRanges(start = gene_audela_10kb$start, end = gene_audela_10kb$end, names = gene_audela_10kb$gene),
                               strand = gene_audela_10kb$strand), 5000 , start = T ) 

flanked_gene_audela_10kb_tab = as.data.frame(flanked_gene_audela_10kb)

flanked_non_associed_gene_peaks = flank( GRanges(seqnames = non_associed_gene_peaks$seqnames,
                               ranges = IRanges(start = non_associed_gene_peaks$start, end = non_associed_gene_peaks$end, names = non_associed_gene_peaks$gene),
                               strand = non_associed_gene_peaks$strand), 5000, start = T ) 
flanked_gene_non_associed_gene_peaks_tab = as.data.frame(flanked_non_associed_gene_peaks)

flanked_gene_non_associed_gene_peaks_tab$gene = rownames(flanked_gene_non_associed_gene_peaks_tab)

4 Région génomiques des pics chevauchant

4.1 IGV

Pour les gènes contenant des pics j’ai regardé sur IGV dans quel région génomique se trouvaient le pic.

result_overlappant_pic = data.frame(type = c("promotor","5'UTR","1st intron","other introns","other","exons"), nombre = c(71,124,50,55,39,15), pourcentage  = c("20.06%","30.03%","14.12%","15.54%","11.02%","4.24%"))
pielabel_overlappant = result_overlappant_pic$pourcentage
pie(result_overlappant_pic$nombre, labels = result_overlappant_pic$type, col = c("#ffffff","#e9a3c9","#c51b7d","#e6f5d0","#a1d76a","#4d9221"), main = "Overlapping peaks genomic distribution" )
legend("bottomright",legend=pielabel_overlappant,fill= c("#ffffff","#e9a3c9","#c51b7d","#e6f5d0","#a1d76a","#4d9221"))

fig <- plot_ly(result_overlappant_pic, labels = ~type, values = ~nombre, type = 'pie')
fig <- fig %>% layout(title = 'Distribution génomique des pics')
fig

4.2 ChIPpeakanno

Pour faire autrement qu’à l’oeil j’utilise le package ChIPpeakAnno

if(require(TxDb.Dmelanogaster.UCSC.dm6.ensGene)){
    TxDb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
    exons <- exons(TxDb, columns=NULL)
    fiveUTRs <- unique(unlist(fiveUTRsByTranscript(TxDb)))
    #TSS = getAnnotation(TxDb)
    overlap_region = assignChromosomeRegion(peak_overlaping_gene_GR, exons, fiveUTRs,TSS, nucleotideLevel=FALSE, 
                           precedence=c("Promoters", "immediateDownstream", 
                                        "fiveUTRs", "threeUTRs", 
                                        "Exons", "Introns"), 
                           TxDb=TxDb)
}
## Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 21 out-of-bound ranges located on sequences
##   chr4, chrY_DS483742v1_random, chrY_DS483875v1_random,
##   chrY_DS484142v1_random, chrY_DS484530v1_random, chrY_DS484909v1_random,
##   chrY_DS485423v1_random, chrY_DS485523v1_random, chrY_DS485938v1_random,
##   chrUn_CP007102v1, chrUn_DS483910v1, chrUn_DS484898v1, chrY, and
##   chrUn_DS484581v1. Note that ranges located on a sequence whose length
##   is unknown (NA) or on a circular sequence are not considered
##   out-of-bound (use seqlengths() and isCircular() to get the lengths and
##   circularity flags of the underlying sequences). You can use trim() to
##   trim these ranges. See ?`trim,GenomicRanges-method` for more
##   information.
## Warning in assignChromosomeRegion(peak_overlaping_gene_GR, exons, fiveUTRs, :
## peaks.RD has sequence levels not in TxDb.
res = as.data.frame(overlap_region$percentage)
res$cat = rownames(res)
fig <- plot_ly(res, labels = ~cat, values = ~overlap_region$percentage, type = 'pie')
fig <- fig %>% layout(title = 'Distribution génomique des pics')
fig

5 Gènes ne se retrouvant pas dans les deux méthodes

genes_manquant = target_set_tab$gene[unique(overlaps_RNA_ChIPseq$gene)%!in%bind_updown$gene]

Avec les deux méthodes gene centrique on trouve à peu près le même nombre de gènes chevauchant.Cependant quand on regarde quels sont les gènes qui diffèrent, mmy, mtsh, CG7627, piwi, Socs36E, CG9331, CG42238, CR43148, CG17018, CG43340, CG1882, PPO2, CG13743, wun, brp, CG12926, PCB, Prx2540-2, Mos, cbc, CG6357, CG6553, Hr51, CG43107, CR44386, mbl, GstE6, CG18190, CG15128, CG34370, CG2852, CG3831, rho, CG18769, pst, CG7194, CG6084, CG43085, CG9629, Eip78C, CR43426, Pzl, Cdep, Cerk, RhoL, hth, l(3)neo38, GstD9, GstD7, Cyp9f2, CG14855, CR43472, MtnE, MtnD, dnd, CR43475, CG17119, CG33111, CG31436, CG10550, CG12290, Tusp, CG12428, CG11897, Zip99C, Fer1HCH, dati, zfh2, apolpp, CG9650, CG32700, CG42797, CG45061, Gip, CR44962, sog, Grip84, car, Hers on voit qu’il y a pas mal de différence. De plus en regardant les gènes qui manquent dans la méthode 1 vis à vis de la méthode 2 on peut voir que certains gènes présentent des pics en leur sein.

Je privilégie la Méthode 2 de la méthode Gene centrique qui s’appuie sur les coordonnées des gènes et des pics en calculant la distance pour tous en ne donnant pas de poids contrairement à la Méthode 2 Pics centriques

6 Conclusion

En conclusion pour les 285 gènes cibles de Svb, le facteur de transcription se fixent pour 178 gènes sur le gène. En majorité dans la région soit du promoteur, soit dans la région 5’UTR. Pour les autres gènes, le facteur de transcription se fixent à moins de 5kb du gène régulé, ce qui correspond à ce que l’on connait sur la régulation des gènes cibles par un facteur de transcription chez la drosophile. Shavenbaby régule l’expression de 247 gènes cibles directes.